The data can be found at covid19ireland-geohive.hub.arcgis.com, specifically:
The data can be loaded using the read_csv function from the readr package. When loading the data the date column can be changed to a date type by using as.Date, this makes it easier to sort and filter by date.
library(readr)
library(dplyr)
# LaboratoryLocalTimeSeriesHistoricView
testing_csv <-
read_csv("http://opendata-geohive.hub.arcgis.com/datasets/f6d6332820ca466999dbd852f6ad4d5a_0.csv?outSR={%22latestWkid%22:3857,%22wkid%22:102100}") %>%
mutate(DateStamp = as.Date(Date_HPSC))
# Covid19AcuteHospitalHistoricSummaryOpenData
hospital_csv <-
read_csv("http://opendata-geohive.hub.arcgis.com/datasets/fe9bb23592ec4142a4f4c2c9bd32f749_0.csv?outSR={%22latestWkid%22:4326,%22wkid%22:4326}") %>%
mutate(DateStamp = as.Date(Date))
# ICUBISHistoricTimelinePublicView
icu_csv <-
read_csv("https://opendata.arcgis.com/datasets/c8208a0a8ff04a45b2922ae69e9b2206_0.csv") %>%
mutate(DateStamp = as.Date(extract))
# CovidStatisticsProfileHPSCIrelandOpenData
profile_csv <-
read_csv("https://opendata.arcgis.com/datasets/d8eb52d56273413b84b0187a4e9117be_0.csv") %>%
mutate(DateStamp = as.Date(StatisticsProfileDate))
cat("### Tidy and Merge Data
The 3 datasets can be joined together using the `left_join` function.
Extra statistics can also be calculated, e.g. the number of daily positive tests can be calculated by taking `Positive - lag(Positive)`. The `lag` function returns the previous value for a vector, therefore `Positive - lag(Positive)` will calculate today's value minus yesterday's value.
The `rollsum` function calculates a rolling total, the argument `k = 14` tells the function to calculate 14 day totals.")
The 3 datasets can be joined together using the left_join function.
Extra statistics can also be calculated, e.g. the number of daily positive tests can be calculated by taking Positive - lag(Positive). The lag function returns the previous value for a vector, therefore Positive - lag(Positive) will calculate today’s value minus yesterday’s value.
The rollsum function calculates a rolling total, the argument k = 14 tells the function to calculate 14 day totals.
# library(zoo)
my_rolling_sum <- function(x, n_days = 14) zoo::rollsum(x - lag(x), k = n_days, align = "right", fill = NA)
covid_tidy <-
testing_csv %>%
arrange(DateStamp) %>%
# select(DateStamp, TotalLabs, Positive, Hospitals) %>%
transmute(DateStamp,
daily_labs = TotalLabs - lag(TotalLabs),
daily_positive = Positive - lag(Positive),
rolling14_labs = my_rolling_sum(TotalLabs),
rolling14_positive = my_rolling_sum(Positive),
roling14_percentage = 100*(rolling14_positive/rolling14_labs)) %>%
left_join(hospital_csv %>%
arrange(DateStamp) %>%
select(DateStamp,
hospital_cases = SUM_number_of_confirmed_covid_1,
hospital_new = SUM_no_new_admissions_covid19_p,
hospital_discharge = SUM_no_discharges_covid19_posit),
by = "DateStamp") %>%
left_join(icu_csv %>%
select(DateStamp,
icu_cases = ncovidconf,
icu_new = adcconf),
by = "DateStamp")
Summaries of the data can now be calculated, such as the statistics on a month by month basis.
covid_table <-
covid_tidy %>%
mutate(month = format(DateStamp, "%m"), month_name = format(DateStamp, "%B")) %>%
group_by(month, month_name) %>%
summarise(total_tests = sum(daily_labs, na.rm = T),
total_positive = sum(daily_positive, na.rm = T),
no_obs = length(daily_labs),
new_hospital = sum(hospital_new, na.rm = T),
new_icu = sum(icu_new, na.rm = T)) %>%
mutate(`% Positive` = paste0(round(100*(total_positive/total_tests), 2), "%")) %>%
select(Month = month_name, `No. of Days Data` = no_obs,
Tests = total_tests, Positive = total_positive, `% Positive`,
`Hospital Admissions` = new_hospital, `ICU Admissions` = new_icu)
t(covid_table[,-1])
| January | February | March | April | May | June | July | August | September | October | November | December | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| No. of Days Data | 31 | 28 | 45 | 33 | 31 | 30 | 31 | 31 | 30 | 31 | 30 | 31 |
| Tests | 705531 | 456157 | 493779 | 193796 | 174997 | 85177 | 203235 | 206960 | 341171 | 453767 | 329966 | 410091 |
| Positive | 100232 | 24177 | 20533 | 21171 | 4901 | 611 | 681 | 2867 | 7578 | 25971 | 11333 | 24857 |
| % Positive | 14.21% | 5.3% | 4.16% | 10.92% | 2.8% | 0.72% | 0.34% | 1.39% | 2.22% | 5.72% | 3.43% | 6.06% |
| Hospital Admissions | 3228 | 1311 | 668 | 1024 | 419 | 85 | 35 | 65 | 177 | 669 | 424 | 539 |
| ICU Admissions | 487 | 206 | 100 | 161 | 59 | 13 | 5 | 9 | 31 | 77 | 70 | 92 |
In order to make it easier to plot multiple variables using ggplot2, the data must be transformed from ‘wide’ to ‘long’. The current format of the data has 1 row for each date, once transformed the data will have a row for each date and variable combination.
| DateStamp | hospital_cases | icu_cases | rolling14_positive | roling14_percentage |
|---|---|---|---|---|
| 2021-04-03 | 242 | 65 | 8211 | 3.3576 |
| 2021-04-02 | 264 | 62 | 8365 | 3.4198 |
| 2021-04-01 | 274 | 63 | 8313 | 3.4827 |
| 2021-03-31 | 297 | 64 | 8267 | 3.5530 |
| 2021-03-30 | 310 | 65 | 8228 | 3.6165 |
| 2021-03-29 | 331 | 70 | 8219 | 3.6700 |
The pivot_longer function will transform the data to a long format. The statistics present in the new data can be chosen from the current columns.
stats_key <-
c("hospital_cases" = "Cases in Hospitals",
"icu_cases" = "Cases in ICU",
"rolling14_positive" = "Positive Tests (14 Day Total)",
"roling14_percentage" = "% Positive Tests (14 Day Total)")
testing_plot_data <-
covid_tidy %>%
tidyr::pivot_longer(cols = names(stats_key),
names_to = "stat", values_to = "value") %>%
mutate(stat = recode_factor(stat, !!!stats_key, .ordered = TRUE))
| DateStamp | stat | value |
|---|---|---|
| 2021-04-02 | rolling14_positive | 8365.0000 |
| 2021-04-02 | roling14_percentage | 3.4198 |
| 2021-04-03 | hospital_cases | 242.0000 |
| 2021-04-03 | icu_cases | 65.0000 |
| 2021-04-03 | rolling14_positive | 8211.0000 |
| 2021-04-03 | roling14_percentage | 3.3576 |
# https://en.wikipedia.org/wiki/COVID-19_pandemic_in_the_Republic_of_Ireland
key_dates <-
data.frame(event = c("First Lockdown", "Easing Restrictions", "Easing Phase 3", "Midlands Lockdown", "Dublin L3", "Level 3", "Level 5", "Level 3", "Level 5-", "Level 5", "Level 5+"),
datestamp = c("2020-03-15", "2020-05-18", "2020-06-29", "2020-08-07", "2020-09-18", "2020-10-04", "2020-10-19", "2020-12-01", "2020-12-22", "2020-12-30", "2021-01-06")) %>%
mutate(datestamp = as.Date(datestamp))
key_dates_2w <-
key_dates %>%
mutate(datestamp = datestamp + 14)
library(ggplot2)
covid_plot <-
testing_plot_data %>%
ggplot() +
geom_line(aes(x = DateStamp, y = value)) +
geom_vline(data = key_dates, aes(xintercept = datestamp), linetype = 2, colour = "brown") +
# geom_vline(data = key_dates_2w, aes(xintercept = datestamp), linetype = 3, colour = "red", size = 0.5) +
geom_text(data = key_dates, mapping = aes(label = event, y = 0, x = datestamp),
angle = 90, hjust = -0, vjust = -0.3, size = 2, colour = "brown") +
# Change y scale to right and have all plots begin scale at 0
scale_y_continuous(position = "right", limits = c(0, NA)) +
# Show each month on the x axis
scale_x_date(date_breaks = "1 month", date_labels = "%b", ) +
# Create individual plots for each statistic
facet_wrap(~stat, ncol = 2, scales = "free") +
# Add labels to the plot
labs(x = "", y = "",
title = "Republic of Ireland Covid Stats",
caption = 'Data: https://opendata-geohive.hub.arcgis.com/') +
theme_light() +
theme(text = element_text(size = 15),
axis.text.x = element_text(size = 10, angle = 0, hjust = 0))
covid_plot
The plot can be saved locally using ggsave
ggsave(file = paste0("~/Desktop/", Sys.Date(), "_covid_roi.png"),
plot = covid_plot, width = 9, height = 5, units = "in")
The data can be filter to show only certain dates using the filter function. The plot below shows data from the beginning of August.
covid_plot_aug <-
testing_plot_data %>%
# remove any data before "2020-08-01"
filter(DateStamp >= start_date) %>%
ggplot() +
geom_line(aes(x = DateStamp, y = value)) +
geom_vline(data = key_dates %>% filter(datestamp>= start_date),
aes(xintercept = datestamp), linetype = 2, colour = "brown") +
geom_text(data = key_dates%>% filter(datestamp>= start_date),
mapping = aes(label = event, y = 0, x = datestamp),
angle = 90, hjust = -0, vjust = -0.3, size = 2, colour = "brown") +
scale_y_continuous(position = "right", limits = c(0, NA)) +
scale_x_date(date_breaks = "3 week", date_labels = "%d %b") +
facet_wrap(~stat, ncol = 2, scales = "free") +
labs(x = "", y = "",
title = paste0("Republic of Ireland Since ", format(start_date, "%B")),
caption = 'Data: https://opendata-geohive.hub.arcgis.com/') +
theme_light() +
theme(text = element_text(size = 15))
covid_plot_aug
by_age_14day <-
profile_csv %>%
select(DateStamp, starts_with("Aged")) %>%
arrange(DateStamp) %>%
# Calculate rolling 14 day totals of all columns beginning with 'Aged'
mutate(across(starts_with("Aged"), my_rolling_sum)) %>%
# Pivot table to long format for columns beginning with 'Aged'
tidyr::pivot_longer(cols = starts_with("Aged"),
names_to = "Age_Group", values_to = "Cases")
| DateStamp | Age_Group | Cases |
|---|---|---|
| 2021-04-03 | Aged1to4 | 634 |
| 2021-04-03 | Aged5to14 | 1237 |
| 2021-04-03 | Aged15to24 | 1127 |
| 2021-04-03 | Aged25to34 | 1182 |
| 2021-04-03 | Aged35to44 | 1428 |
| 2021-04-03 | Aged45to54 | 895 |
by_age_14day %>%
filter(DateStamp > start_date) %>%
# Change order in which the age groups will appear for plot
mutate(Age_Group = forcats::fct_relevel(Age_Group, unique(.$Age_Group))) %>%
# Begin plot
ggplot(aes(x = DateStamp, y = Cases)) +
geom_line() +
# Create a single plot for each age group
facet_wrap(~Age_Group) +
# Specify the breaks for dates
scale_x_date(date_breaks = "1 month", date_labels = "%b") +
labs(title = "14 Day Incidence by Age Group",
x = "", y = "") +
theme_light() +
theme(text = element_text(size = 16))
# same as above but with "HospitalisedAged"
profile_csv %>%
select(DateStamp, starts_with("HospitalisedAged")) %>%
arrange(DateStamp) %>%
mutate(across(starts_with("HospitalisedAged"), my_rolling_sum)) %>%
tidyr::pivot_longer(cols = starts_with("HospitalisedAged"),
names_to = "Age_Group", values_to = "Cases") %>%
filter(DateStamp > start_date) %>%
mutate(Age_Group = forcats::fct_relevel(Age_Group, unique(.$Age_Group))) %>%
ggplot(aes(x = DateStamp, y = Cases)) +
geom_line() +
facet_wrap(~Age_Group) +
scale_x_date(date_breaks = "3 week", date_labels = "%d %b") +
labs(title = "14 Day Admissions to Hospital by Age Group",
x = "", y = "") +
theme_light() +
theme(text = element_text(size = 16))
case_and_hospital_14day <-
profile_csv %>%
select(DateStamp, starts_with(c("Aged","HospitalisedAged"))) %>%
# use mutates to consolidate case groupings to match hospital groups
mutate(Aged4 = Aged1to4, HospitalisedAged4 = HospitalisedAged5, .keep = "unused")%>%
mutate(across(starts_with(c("Aged","HospitalisedAged")), my_rolling_sum)) %>%
tidyr::pivot_longer(cols = starts_with(c("Aged","HospitalisedAged")),
names_to = "Age_Group", values_to = "Cases")
| DateStamp | Age_Group | Cases |
|---|---|---|
| 2021-04-03 | Aged4 | 634 |
| 2021-04-03 | HospitalisedAged4 | 11 |
| 2021-04-02 | Aged4 | 674 |
| 2021-04-02 | HospitalisedAged4 | 11 |
| 2021-04-01 | Aged4 | 692 |
| 2021-04-01 | HospitalisedAged4 | 8 |
case_and_hospital_14day <-
case_and_hospital_14day %>%
# create new variable 'Hospital' to denote if the row is a value for hospital
# also remove Hospitalised from hospital age groups to match the cases grouping
mutate(Hospital = if_else(stringr::str_detect(Age_Group, "Hospital"), "Hospitalised", "Total"),
Age_Group = stringr::str_replace(Age_Group, "Hospitalised", ""))
| DateStamp | Age_Group | Cases | Hospital |
|---|---|---|---|
| 2021-04-03 | Aged4 | 634 | Total |
| 2021-04-03 | Aged4 | 11 | Hospitalised |
| 2021-04-02 | Aged4 | 674 | Total |
| 2021-04-02 | Aged4 | 11 | Hospitalised |
| 2021-04-01 | Aged4 | 692 | Total |
| 2021-04-01 | Aged4 | 8 | Hospitalised |
case_and_hospital_14day <-
case_and_hospital_14day %>%
# pivot on the new 'Hospital' variable
tidyr::pivot_wider(names_from = Hospital, values_from = Cases)
| DateStamp | Age_Group | Total | Hospitalised |
|---|---|---|---|
| 2021-04-03 | Aged4 | 634 | 11 |
| 2021-04-02 | Aged4 | 674 | 11 |
| 2021-04-01 | Aged4 | 692 | 8 |
| 2021-03-31 | Aged4 | 690 | 9 |
| 2021-03-30 | Aged4 | 677 | 9 |
# scl is used to scale the lines for the two groups
# We want the max values for both to be the limit for each
scl <-
case_and_hospital_14day %>%
filter(DateStamp >= start_date) %>%
summarise(max_tot = max(Total, na.rm = T),
max_hospital = max(Hospitalised, na.rm = T)) %>%
mutate(scale = max_tot/max_hospital) %>%
pull(scale)
# cols are colours which are colourblind friendly
cols <-
c("#E69F00", "#56B4E9", "#009E73",
"#F0E442", "#0072B2", "#D55E00", "#CC79A7")
# library(ggtext)
case_and_hospital_14day %>%
filter(DateStamp >= start_date) %>%
mutate(Age_Group = forcats::fct_relevel(Age_Group, c("Aged4", "Aged5to14"))) %>%
ggplot() +
# Add line for total cases
geom_line(aes(x = DateStamp, y = Total, colour = "Total Cases")) +
# Add line for hospital admissions
geom_line(aes(x = DateStamp, y = Hospitalised*scl, colour = "Hospitalised")) +
# Create a separate plot for each group
facet_wrap(~Age_Group) +
# sec_axis scales the hospital admissions data and adds axis labels on the right hand side
scale_y_continuous(sec.axis = sec_axis(~./scl, name = "14 Day Hopital Admissions")) +
scale_x_date(date_breaks = "2 weeks", date_labels = "%d %b") +
# Use the custom colours
scale_colour_manual(values = cols) +
labs(title = paste0("<span style='color:", cols[2],";'>14 Day Incidence</span>"," and ",
"<span style='color:", cols[1],";'>Hospital Admissions</span>"," by Age Group"),
# title = "14 Day Incidence and Hospital Admissions by Age Group",
x = "", y = "14 Day Incidence", colour= "") +
theme_light() +
# Change elements of the plot to match the colours of the lines
theme(plot.title = ggtext::element_markdown(lineheight = 1.1),
legend.position = "none",
legend.margin=margin(-10,0,0,0),
axis.text.y.right=element_text(colour=cols[1]),
axis.ticks.y.right=element_line(colour=cols[1]),
axis.title.y.right=element_text(colour=cols[1]),
axis.text.y=element_text(colour=cols[2]),
axis.ticks.y=element_line(colour=cols[2]),
axis.title.y=element_text(colour=cols[2]),
text = element_text(size = 16),
axis.text.x = element_text(size = 12, angle = -30, hjust = 0))
case_and_hospital_14day %>%
filter(DateStamp >= (start_date + 30)) %>%
mutate(Age_Group = forcats::fct_relevel(Age_Group, c("Aged4", "Aged5to14"))) %>%
ggplot() +
# Add line for total cases
geom_tile(aes(x = DateStamp, y = Age_Group, fill = Total), size = 0.1) +
# scale_fill_viridis_c() +
# scale_fill_distiller(palette = "RdYlGn", limits = c(0, NA)) +
scale_fill_viridis_c() +
scale_x_date(date_breaks = "4 days", date_labels = "%d %b", expand = c(0,0)) +
coord_equal() +
# theme_light() +
# ggthemes::theme_tufte(base_family="Helvetica") +
labs(x = NULL, y = NULL, fill = NULL,
title = "14 Day Incidence") +
theme(axis.ticks.y=element_blank(),
# axis.text.x = element_text(hjust = 0),
legend.position = "bottom")
case_and_hospital_14day %>%
filter(DateStamp >= (start_date + 30)) %>%
mutate(Age_Group = forcats::fct_relevel(Age_Group, c("Aged4", "Aged5to14")),
Total = Total/14) %>%
ggplot() +
# Add line for total cases
geom_tile(aes(x = DateStamp, y = Age_Group, fill = Total), size = 0.1) +
# scale_fill_viridis_c() +
# scale_fill_distiller(palette = "RdYlGn", limits = c(0, NA)) +
scale_fill_viridis_c(limits = c(0, NA)) +
# viridis::scale_fill_viridis() +
scale_x_date(date_breaks = "4 days", date_labels = "%d %b", expand = c(0,0)) +
coord_equal() +
# theme_light() +
# ggthemes::theme_tufte(base_family="Helvetica") +
labs(x = NULL, y = NULL, fill = NULL,
title = "Average Daily Incidence (14 Day Average)") +
theme(axis.ticks.y=element_blank(),
# axis.text.x = element_text(hjust = 0),
legend.position = "bottom")
# # Add line for hospital admissions
# geom_line(aes(x = DateStamp, y = Hospitalised*scl, colour = "Hospitalised")) +
# # Create a separate plot for each group
# facet_wrap(~Age_Group) +
# # sec_axis scales the hospital admissions data and adds axis labels on the right hand side
# scale_y_continuous(sec.axis = sec_axis(~./scl, name = "14 Day Hopital Admissions")) +
# scale_x_date(date_breaks = "2 weeks", date_labels = "%d %b") +
# # Use the custom colours
# scale_colour_manual(values = cols) +
# labs(title = paste0("<span style='color:", cols[2],";'>14 Day Incidence</span>"," and ",
# "<span style='color:", cols[1],";'>Hospital Admissions</span>"," by Age Group"),
# # title = "14 Day Incidence and Hospital Admissions by Age Group",
# x = "", y = "14 Day Incidence", colour= "") +
# theme_light()
case_and_hospital_14day %>%
filter(DateStamp >= Sys.Date() - 10) %>%
mutate(Age_Group = forcats::fct_relevel(Age_Group, c("Aged4", "Aged5to14"))) %>%
ggplot() +
# Add line for total cases
geom_tile(aes(y = DateStamp, x = Age_Group, fill = Total), size = 0.1) +
geom_text(aes(y = DateStamp, x = Age_Group, label = Total)) +
# scale_fill_viridis_c() +
# scale_fill_distiller(palette = "RdYlGn", limits = c(0, NA)) +
scale_fill_viridis_c(limits = c(0, NA)) +
scale_y_date(date_breaks = "1 days", date_labels = "%d %b", expand = c(0,0)) +
scale_x_discrete(expand = c(0,0)) +
# coord_equal() +
# theme_light() +
# ggthemes::theme_tufte(base_family="Helvetica") +
labs(x = NULL, y = NULL, fill = NULL,
title = "14 Day Incidence") +
theme(axis.ticks.y=element_blank(),
# axis.text.x = element_text(hjust = 0),
legend.position = "none")
profile_csv %>%
mutate(NoData = 100 - (CommunityTransmission + CloseContact + TravelAbroad),
NoData = if_else(NoData < 0, 0, NoData)) %>%
# select(DateStamp, CommunityTransmission, CloseContact, TravelAbroad, NoData) %>%
tidyr::pivot_longer(cols = c(CommunityTransmission, CloseContact, TravelAbroad),#, NoData
names_to = "Origin", values_to = "Perc") %>%
tidyr::drop_na() %>%
# mutate(Origin = forcats::fct_relevel(Origin, unique(.$Origin))) %>%
ggplot(aes(x = DateStamp, y = Perc, colour = Origin)) +
geom_line(size = 0.8) +
scale_y_continuous(labels = function(x) paste0(x, "%"), limits = c(0, 75), position = "right",
sec.axis = sec_axis(~., labels = function(x) paste0(x, "%"))) +
scale_x_date(date_breaks = "1 month", date_labels = "%b") +
scale_colour_manual(values=cols) +
labs(title = "Origin of Transmission ",
y = "", x = "", colour= "") +
theme_light() +
theme(legend.margin=margin(-10,0,0,0),
text = element_text(size = 16),
legend.position = "bottom") # legend.title=element_blank()